
cd('.')
addpath('matlab functions')

D0 = readtable('NC Data/SCC_4to5.csv');
D0.intercept(:) = 1;

numstud = 0:100;
numsim = 2000;

OUT = struct();

for sbj = {'math','reading'}

    vamdl = load(sprintf('output/main/vam_%s_matching.mat',sbj{1}));
    vadist = vamdl.VAdistEstimates;
    controls = vamdl.controls;
    tchcoef = vamdl.tchcoef;
    outcome = vamdl.modspecs.outcome;

    lvl = startsWith(tchcoef,'yr');

    gm = gmdistribution(vadist.mean',vadist.cov,vadist.pr);

    D = D0(~isnan(D0.(outcome)),:);
    Z = D{:,tchcoef};
    Zcov = cov(Z,1);
    ZE2 = (Z'*Z)./size(Z,1); 

    OUT.(sbj{1}).rand = struct('x',numstud,'y_tot',nan(size(numstud)),'y_match',nan(size(numstud)));

    i = 0;
    for n = numstud
        i = i + 1;
        fprintf('%d \n',n)
        if n==0
            OUT.(sbj{1}).rand.y_tot(i) = trace(vamdl.VAmomentEstimates.cov*ZE2);
            OUT.(sbj{1}).rand.y_match(i) = trace(vamdl.VAmomentEstimates.cov(~lvl,~lvl)*Zcov(~lvl,~lvl));
            continue
        end
        var_tot = zeros([numsim 1]);
        var_match = zeros([numsim 1]);
        for s = 1:numsim
            Ds = datasample(D,n);
            Ds.teacherid(:) = 1;
            Ds.sid = (1:n)';
            Ds.(outcome) = Ds{:,controls.Properties.RowNames}*controls.Coefficient ...
                + Ds{:,tchcoef}*random(gm,1)' + randn([n 1])*sqrt(vadist.residvar);
            vadata = genvadata(Ds,vamdl.modspecs,vamdl.controls);
            [~,vapost] = vaposterior(vamdl.VAdistEstimates,vadata);
            MOM = gmmom(vapost);
            var_tot(s) = trace(MOM.cov*ZE2);
            var_match(s) = trace(MOM.cov(~lvl,~lvl)*Zcov(~lvl,~lvl));
        end
        OUT.(sbj{1}).rand.y_tot(i) = mean(var_tot);
        OUT.(sbj{1}).rand.y_match(i) = mean(var_match);
    end

    fit_tot = fit(OUT.(sbj{1}).rand.x',OUT.(sbj{1}).rand.y_tot','smoothingspline');
    OUT.(sbj{1}).rand.y_tot = fit_tot(OUT.(sbj{1}).rand.x')';

    fit_match = fit(OUT.(sbj{1}).rand.x',OUT.(sbj{1}).rand.y_match','smoothingspline');
    OUT.(sbj{1}).rand.y_match = fit_match(OUT.(sbj{1}).rand.x')';

    W = zeros([numel(vamdl.ValueAddedEstimates) 3]);
    for j = 1:numel(vamdl.ValueAddedEstimates)
        MOM = gmmom(vamdl.ValueAddedEstimates(j).vadist);
        W(j,1) = sum(D.teacherid==vamdl.ValueAddedEstimates(j).teacherid);
        W(j,2) = trace(MOM.cov*ZE2);
        W(j,3) = trace(MOM.cov(~lvl,~lvl)*Zcov(~lvl,~lvl));
    end
    W = groupsummary(array2table(W,'VariableNames',{'n','var_tot','var_match'}),'n','mean',{'var_tot','var_match'});

    OUT.(sbj{1}).obs = struct('x',W.n,'y_tot',W.mean_var_tot,'y_match',W.mean_var_match);
    
end

plotformat = {'interpreter', 'latex','FontWeight','normal','FontSize',10,'FontName','Times'};
axisformat = {'FontUnits','points','FontWeight','normal','FontSize',10,'FontName','Times'};

%-- TOTAL --
FOOTER = .08;
DIM = [2 2];
OUTPOS = subplot_outpos(DIM,.01,FOOTER,.02);
%OUTPOS = {[0.0100 0.1700 0.4600 0.8200] [0.4900 0.1700 0.4800 0.8200]};

FIGRATIO = 1;

close all
pos = get(gcf,'Position');
pos(3) = sqrt((pos(3)*pos(4)/FIGRATIO));
pos(4) = pos(3)*FIGRATIO;
set(gcf,'Position',pos)

i = 1;
for sbj = {'math','reading'}
    for o = {'y_tot','y_match'}
    
        [COL,ROW] = ind2sub(flip(DIM),i);
        subplot(DIM(1),DIM(2),i)
        plot(OUT.(sbj{1}).rand.x,OUT.(sbj{1}).rand.(o{1}),'-k')
        hold on
        plot(OUT.(sbj{1}).obs.x,OUT.(sbj{1}).obs.(o{1}),'.k','MarkerSize',3)
        xlabel('Number of Students',plotformat{:});
        if i==1
            title('Math - Total VA',plotformat{:});
            YLIM = [0 .06];
        elseif i==3
            title('Reading - Total VA',plotformat{:});
            YLIM = [0 0.03];
        elseif i==2
            title('Math - Match Component',plotformat{:});
            YLIM = [0 0.008];
        elseif i==4
            title('Reading - Match Component',plotformat{:});
            YLIM = [0 0.008];
        end    
        set(gca,'box','off','YLim',YLIM,'XLim',[0 100],'XTick',[0 25 50 75 100],'OuterPosition',OUTPOS{ROW,COL},axisformat{:});
        ax = gca;
        ax.YAxis.Exponent = 0;
        i = i + 1;
    end
end
hL = legend({'Randomly Assigned Students','Empirical Assigments'} ...
    ,'Orientation','horizontal',plotformat{:});
set(hL,'Position',[(1-hL.Position(3))/2 (FOOTER-hL.Position(4))/2 hL.Position(3) hL.Position(4)]);

set(gcf,'PaperPositionMode', 'manual','PaperUnits','inches'...
        ,'PaperPosition',[0 0 [1 FIGRATIO]*4.5],'PaperSize',[1 FIGRATIO]*4.5)
print('-dpdf','tables and figures/graphics/va_prec.pdf','-r2000')
close all  

